In [308]:
import os,sys,json,re
import pandas as pd
import numpy as np

Notes:

The file directory in which the .rec files are located is named 'day' and needs to be within the working directory - or modified

Integrity Check of Data Files

In [309]:
''' Works if this notebook is in the working directory of the interpreter 
- otherwise, replace 'day' with the directory in which the rec files(and logs) are located'''
file_list = os.listdir('day') 
r0 = re.compile(r'^.*.rec$')
r1 = re.compile(r'^[0-9]{3}-(?P<day>day[0-9]+).rec$')
day = ''
flag = False
for file in file_list: #assume file is file name
    if r0.match(file): #if our .rec regex matches - we know it's a .rec file
        flag = True
        results = re.search(r1, file) #if no match results = None
        if not results:
            print("incorrectly formatted .rec")
            print(file)
            break
        else:
            if day == '':
                day = results.group('day') #gets day
            else:
                if results.group('day') != day:
                    print("incorrect day")
                    break
if not flag:
    print('No rec files found')
In [310]:
rec_list = [rec for rec in file_list if r0.match(rec)]
rec_list.sort()

DataFrame Creation

In [311]:
def getval(val):
    try:
        return val
    except:
        None

timestamps,uuids,pos_xs,pos_ys,neighbor_xs,neighbor_ys,phases,last_rec = [],[],[],[],[],[],[],[]
# last_rec signifies whether the timestamp belongs to the last recording of the session - used to accumulate the timestamps
nano = 0
for rec in rec_list:
    json_list = []
    with open('day/{}'.format(rec)) as f:
        for r in f.readlines():
            json_list.append(json.loads(r))
    for l in json_list:
        if l['message'] == '_SAVE_LIVEFRAME':
            for visual in l['data']['visuals']:
                if 'uuid' in visual and visual.get('type') == 'Mesh':
                    uuids.append(visual['uuid'])
                    timestamps.append(l['timestamp'] + nano)
                    pos_xs.append(float(visual['position']['x']))
                    pos_ys.append(float(visual['position']['y']))
                    neighbor_xs.append(float(visual['dashedLine']['x2']) if 'dashedLine' in visual and 'x2' in visual['dashedLine'] else None)
                    neighbor_ys.append(float(visual['dashedLine']['y2']) if 'dashedLine' in visual and 'y2' in visual['dashedLine'] else None)
                    phases.append(visual['dashedLine']['settings']['width'] 
                                  if 'dashedLine' in visual and 'settings' in visual['dashedLine'] and 'width' in visual['dashedLine']['settings'] else None)
                    last_rec.append(0)
    print(rec)
    if last_rec:
        last_rec[-1] = 1
        nano = timestamps[-1]
df_dict = {'uuids': uuids, 'timestamps':timestamps, 'pos_xs':pos_xs, 'pos_ys':pos_ys, 'neighbor_xs':neighbor_xs, 'neighbor_ys':neighbor_ys, 'phases':phases, 'last_rec': last_rec}
df = pd.DataFrame(df_dict)
001-day3.rec
002-day3.rec
003-day3.rec
004-day3.rec
005-day3.rec
006-day3.rec
007-day3.rec
008-day3.rec
009-day3.rec
010-day3.rec
011-day3.rec
012-day3.rec
013-day3.rec
014-day3.rec
015-day3.rec
016-day3.rec
017-day3.rec
018-day3.rec
In [312]:
df
Out[312]:
last_rec neighbor_xs neighbor_ys phases pos_xs pos_ys timestamps uuids
0 0 -0.0404 0.2580 1.0 0.1490 -1.4500 41550280 A943B4A5-88AC-4E26-83C4-E2F78CD9D1E1
1 0 0.1490 -1.4500 15.0 -0.0404 0.2580 41550280 B43FDA85-832B-44D2-A8A6-5C0E75C06FE1
2 0 0.1820 0.2180 15.0 0.0134 -1.5200 143818794 A943B4A5-88AC-4E26-83C4-E2F78CD9D1E1
3 0 0.0134 -1.5200 15.0 0.1820 0.2180 143818794 B43FDA85-832B-44D2-A8A6-5C0E75C06FE1
4 0 0.5440 0.1150 15.0 -0.1220 -1.6000 244045742 A943B4A5-88AC-4E26-83C4-E2F78CD9D1E1
5 0 -0.1220 -1.6000 1.0 0.5440 0.1150 244045742 B43FDA85-832B-44D2-A8A6-5C0E75C06FE1
6 0 0.8020 0.0300 15.0 -0.2920 -1.6700 344189521 A943B4A5-88AC-4E26-83C4-E2F78CD9D1E1
7 0 -0.2920 -1.6700 1.0 0.8020 0.0300 344189521 B43FDA85-832B-44D2-A8A6-5C0E75C06FE1
8 0 1.0800 -0.1790 15.0 -0.4910 -1.7500 444909701 A943B4A5-88AC-4E26-83C4-E2F78CD9D1E1
9 0 -0.4910 -1.7500 1.0 1.0800 -0.1790 444909701 B43FDA85-832B-44D2-A8A6-5C0E75C06FE1
10 0 1.2900 -0.3450 15.0 -0.6140 -1.7600 545157676 A943B4A5-88AC-4E26-83C4-E2F78CD9D1E1
11 0 -0.6140 -1.7600 1.0 1.2900 -0.3450 545157676 B43FDA85-832B-44D2-A8A6-5C0E75C06FE1
12 0 1.3900 -0.4710 15.0 -0.7580 -1.7300 644233453 A943B4A5-88AC-4E26-83C4-E2F78CD9D1E1
13 0 -0.7580 -1.7300 15.0 1.3900 -0.4710 644233453 B43FDA85-832B-44D2-A8A6-5C0E75C06FE1
14 0 1.4800 -0.6230 15.0 -1.0100 -1.6000 743945673 A943B4A5-88AC-4E26-83C4-E2F78CD9D1E1
15 0 -1.0100 -1.6000 15.0 1.4800 -0.6230 743945673 B43FDA85-832B-44D2-A8A6-5C0E75C06FE1
16 0 1.5300 -0.8300 15.0 -1.2800 -1.4400 844427787 A943B4A5-88AC-4E26-83C4-E2F78CD9D1E1
17 0 -1.2800 -1.4400 15.0 1.5300 -0.8300 844427787 B43FDA85-832B-44D2-A8A6-5C0E75C06FE1
18 0 1.4900 -0.9990 15.0 -1.4600 -1.3100 940932000 A943B4A5-88AC-4E26-83C4-E2F78CD9D1E1
19 0 -1.4600 -1.3100 15.0 1.4900 -0.9990 940932000 B43FDA85-832B-44D2-A8A6-5C0E75C06FE1
20 0 1.4000 -1.2700 15.0 -1.5600 -1.1700 1043659440 A943B4A5-88AC-4E26-83C4-E2F78CD9D1E1
21 0 -1.5600 -1.1700 15.0 1.4000 -1.2700 1043659440 B43FDA85-832B-44D2-A8A6-5C0E75C06FE1
22 0 1.3300 -1.5200 15.0 -1.5800 -0.9690 1144174869 A943B4A5-88AC-4E26-83C4-E2F78CD9D1E1
23 0 -1.5800 -0.9690 15.0 1.3300 -1.5200 1144174869 B43FDA85-832B-44D2-A8A6-5C0E75C06FE1
24 0 1.1900 -1.7400 15.0 -1.5700 -0.7640 1243361573 A943B4A5-88AC-4E26-83C4-E2F78CD9D1E1
25 0 -1.5700 -0.7640 15.0 1.1900 -1.7400 1243361573 B43FDA85-832B-44D2-A8A6-5C0E75C06FE1
26 0 0.9270 -1.9000 15.0 -1.5200 -0.6010 1340806367 A943B4A5-88AC-4E26-83C4-E2F78CD9D1E1
27 0 -1.5200 -0.6010 1.0 0.9270 -1.9000 1340806367 B43FDA85-832B-44D2-A8A6-5C0E75C06FE1
28 0 -1.4200 -0.4520 1.0 -3.5500 -2.7100 1440507862 B51BDF6B-A4F1-4284-BD1C-0AFCCDACAE99
29 0 0.7200 -2.0600 15.0 -1.4200 -0.4520 1440507862 A943B4A5-88AC-4E26-83C4-E2F78CD9D1E1
... ... ... ... ... ... ... ... ...
57705 0 1.8900 -1.7200 15.0 2.3500 -0.0888 2191439152148 B51BDF6B-A4F1-4284-BD1C-0AFCCDACAE99
57706 0 2.3500 -0.0888 15.0 1.8900 -1.7200 2191439152148 A943B4A5-88AC-4E26-83C4-E2F78CD9D1E1
57707 0 1.8900 -1.7200 30.0 -0.2080 -2.7300 2191439152148 8E82AA58-ACBD-4B47-8C56-9A7E275102DC
57708 0 1.8500 -1.6900 15.0 2.2700 -0.1020 2191539121263 B51BDF6B-A4F1-4284-BD1C-0AFCCDACAE99
57709 0 2.2700 -0.1020 15.0 1.8500 -1.6900 2191539121263 A943B4A5-88AC-4E26-83C4-E2F78CD9D1E1
57710 0 1.8500 -1.6900 30.0 -0.2030 -2.6800 2191539121263 8E82AA58-ACBD-4B47-8C56-9A7E275102DC
57711 0 1.8200 -1.6500 15.0 2.1100 -0.1340 2191639205183 B51BDF6B-A4F1-4284-BD1C-0AFCCDACAE99
57712 0 2.1100 -0.1340 15.0 1.8200 -1.6500 2191639205183 A943B4A5-88AC-4E26-83C4-E2F78CD9D1E1
57713 0 1.8200 -1.6500 30.0 -0.1970 -2.6400 2191639205183 8E82AA58-ACBD-4B47-8C56-9A7E275102DC
57714 0 1.8000 -1.6200 15.0 1.7900 -0.2030 2191739640137 B51BDF6B-A4F1-4284-BD1C-0AFCCDACAE99
57715 0 1.7900 -0.2030 15.0 1.8000 -1.6200 2191739640137 A943B4A5-88AC-4E26-83C4-E2F78CD9D1E1
57716 0 1.8000 -1.6200 30.0 -0.1850 -2.6100 2191739640137 8E82AA58-ACBD-4B47-8C56-9A7E275102DC
57717 0 1.7800 -1.6100 15.0 1.5400 -0.2580 2191839213369 B51BDF6B-A4F1-4284-BD1C-0AFCCDACAE99
57718 0 1.5400 -0.2580 15.0 1.7800 -1.6100 2191839213369 A943B4A5-88AC-4E26-83C4-E2F78CD9D1E1
57719 0 1.7800 -1.6100 30.0 -0.1690 -2.5900 2191839213369 8E82AA58-ACBD-4B47-8C56-9A7E275102DC
57720 0 1.7700 -1.5900 15.0 1.4500 -0.2590 2191939160879 B51BDF6B-A4F1-4284-BD1C-0AFCCDACAE99
57721 0 1.4500 -0.2590 2.0 1.7700 -1.5900 2191939160879 A943B4A5-88AC-4E26-83C4-E2F78CD9D1E1
57722 0 1.7700 -1.5900 30.0 -0.1570 -2.5700 2191939160879 8E82AA58-ACBD-4B47-8C56-9A7E275102DC
57723 0 1.7700 -1.5800 15.0 1.3400 -0.2910 2192039180330 B51BDF6B-A4F1-4284-BD1C-0AFCCDACAE99
57724 0 1.3400 -0.2910 2.0 1.7700 -1.5800 2192039180330 A943B4A5-88AC-4E26-83C4-E2F78CD9D1E1
57725 0 1.7700 -1.5800 30.0 -0.1500 -2.5600 2192039180330 8E82AA58-ACBD-4B47-8C56-9A7E275102DC
57726 0 1.7800 -1.5600 15.0 1.2700 -0.3150 2192139455396 B51BDF6B-A4F1-4284-BD1C-0AFCCDACAE99
57727 0 1.2700 -0.3150 2.0 1.7800 -1.5600 2192139455396 A943B4A5-88AC-4E26-83C4-E2F78CD9D1E1
57728 0 1.7800 -1.5600 30.0 -0.1420 -2.5400 2192139455396 8E82AA58-ACBD-4B47-8C56-9A7E275102DC
57729 0 1.7900 -1.5700 15.0 1.2900 -0.3090 2192239313239 B51BDF6B-A4F1-4284-BD1C-0AFCCDACAE99
57730 0 1.2900 -0.3090 2.0 1.7900 -1.5700 2192239313239 A943B4A5-88AC-4E26-83C4-E2F78CD9D1E1
57731 0 1.7900 -1.5700 30.0 -0.1340 -2.5300 2192239313239 8E82AA58-ACBD-4B47-8C56-9A7E275102DC
57732 0 1.7900 -1.6000 15.0 1.1900 -0.3480 2192340188787 B51BDF6B-A4F1-4284-BD1C-0AFCCDACAE99
57733 0 1.1900 -0.3480 2.0 1.7900 -1.6000 2192340188787 A943B4A5-88AC-4E26-83C4-E2F78CD9D1E1
57734 1 1.7900 -1.6000 30.0 -0.1330 -2.5300 2192340188787 8E82AA58-ACBD-4B47-8C56-9A7E275102DC

57735 rows × 8 columns

In [313]:
df['uuids'].unique(), len(df['uuids'].unique()) # Unique uuids -  uniques across the entire dataframe
Out[313]:
(array(['A943B4A5-88AC-4E26-83C4-E2F78CD9D1E1',
        'B43FDA85-832B-44D2-A8A6-5C0E75C06FE1',
        'B51BDF6B-A4F1-4284-BD1C-0AFCCDACAE99',
        '8E82AA58-ACBD-4B47-8C56-9A7E275102DC',
        'CE592D97-F593-4DEC-9D7E-03484B07CBA6',
        '177622F4-49B3-4B31-BE17-5FE72243F000',
        '03ACB8CE-058D-4B5E-81AC-C9D2C33C170F',
        '53A05400-71AB-4EFC-B1CD-50FD128721FB',
        'C46E3F94-2BF4-4C31-8837-5933AC54CA44',
        '6B41F94D-B0E4-45F5-822F-123373507D80',
        '1F0799E8-EE21-476D-8CF3-B1F74C904D6F'], dtype=object), 11)
In [314]:
from bokeh.io import output_notebook
from bokeh.plotting import figure, output_file, show
output_notebook()
Loading BokehJS ...

Creates the timestamp and velocity series to visualize for one user

In [315]:
test_frame = df['uuids'].unique()[0]
time_diff = df[df['uuids'] == test_frame]['timestamps'].diff()
x_diff = df[df['uuids'] == test_frame]['pos_xs'].diff()
y_diff = df[df['uuids'] == test_frame]['pos_ys'].diff()

dist_diff = np.sqrt(x_diff**2 + y_diff**2)
velocity = dist_diff/time_diff

u1t = df[df['uuids'] == test_frame]['timestamps']

p = figure(x_axis_label='time', y_axis_label='velocity')
p.line(u1t, velocity)
show(p)
In [316]:
from bokeh.palettes import inferno
xs = []
ys = []
for u in df['uuids'].unique():
    test_frame = u
    time_diff = df[df['uuids'] == test_frame]['timestamps'].diff()
    x_diff = df[df['uuids'] == test_frame]['pos_xs'].diff()
    y_diff = df[df['uuids'] == test_frame]['pos_ys'].diff()

    dist_diff = np.sqrt(x_diff**2 + y_diff**2)
    velocity = dist_diff/time_diff

    u1t = df[df['uuids'] == test_frame]['timestamps']
    xs.append(u1t)
    ys.append(velocity)
df['velocity'] = pd.concat(ys)
p = figure(x_axis_label='time', y_axis_label='velocity', y_range=[0,1e-7])
p.multi_line(xs, ys, color=inferno(len(df['uuids'].unique())), alpha=[.5 for n in range(len(df['uuids'].unique()))])

# Identifying the transition points of the REC files
cutoff = df[df['last_rec']==1][['velocity', 'timestamps']]
p.circle(cutoff['timestamps'], cutoff['velocity'], fill_alpha=.3, size=7)

show(p)
In [317]:
df.to_csv('frame.csv')
In [318]:
xs = []
ys = []
for u in df['uuids'].unique():
    test_frame = u
    time_diff = df[df['uuids'] == test_frame]['timestamps'].diff()
    x_diff = df[df['uuids'] == test_frame]['pos_xs']-df[df['uuids'] == test_frame]['neighbor_xs']
    y_diff = df[df['uuids'] == test_frame]['pos_ys']-df[df['uuids'] == test_frame]['neighbor_ys']
    
    dist_neighbor = np.sqrt(x_diff**2 + y_diff**2)
     
    u1t = df[df['uuids'] == test_frame]['timestamps']
    xs.append(u1t)
    ys.append(dist_neighbor)

df['dist_neighbor'] = pd.concat(ys)

p = figure(x_axis_label='Time', y_axis_label='Distance from Neighbor')
p.multi_line(xs,ys, color=inferno(len(df['uuids'].unique())), alpha=[.5 for n in range(len(df['uuids'].unique()))])

# Identifying the transition points of the REC files
cutoff = df[df['last_rec']==1][['dist_neighbor', 'timestamps']]
p.circle(cutoff['timestamps'], cutoff['dist_neighbor'], fill_alpha=.3, size=7)

show(p)
In [341]:
phase_dict = {1:'gas', 30: 'ice', 15: 'liquid', 2: 'impossible'}
In [529]:
from collections import Counter
time_df = df.copy().set_index(pd.TimedeltaIndex(df['timestamps']))
time_df['phases'] = time_df['phases'].map(phase_dict)
time_df_res = time_df.resample('1min')
time_df_res = time_df_res.aggregate({'phases': lambda x: list(x)})
time_df_res = pd.DataFrame(time_df_res)
time_df_res['phases'] = time_df_res['phases'].apply(lambda x: pd.np.nan if not len(x) else x)

time_df_res = time_df_res[~time_df_res['phases'].isnull()]
def p_phase(x):
    x = x[0]
    count = Counter(x)
    sum_val = sum(count.values())
    count_dict = {'p_gas': count['gas']/sum_val,
                  'p_ice': count['ice']/sum_val,
                  'p_liquid': count['liquid']/sum_val}
    return pd.Series(count_dict)
In [530]:
phase_prob = time_df_res.apply(p_phase, axis=1)
In [534]:
p = figure(x_axis_type = 'datetime', 
           x_axis_label = 'Time', 
           y_axis_label = 'Percent of children in Phase')
p.multi_line([phase_prob.index]*3,[phase_prob['p_gas'], phase_prob['p_liquid'], phase_prob['p_ice']],
             line_color=['red', 'green', 'blue'])
from bokeh.models.formatters import DatetimeTickFormatter
p.xaxis.formatter = DatetimeTickFormatter(formats=dict(hours=["%s"],days=["%s"],months=["%s"],years=["%s"]))
show(p)
C:\Users\dmley\Anaconda3\lib\site-packages\bokeh\util\deprecation.py:34: BokehDeprecationWarning: DatetimeTickFormatter.formats was deprecated in Bokeh 0.12.3 and will be removed, use individual DatetimeTickFormatter fields instead.
  warn(message)
In [ ]: